package com.haozhen.rdd

/**
  * @author haozhen
  * @email haozh@ync1.com
  * @date 2021/1/30  11:13
  */
object AdlogDemo {

  def main(args: Array[String]): Unit = {
    import org.apache.spark.rdd.RDD
    import org.apache.spark.{SparkConf, SparkContext}
    var conf = new SparkConf().setMaster("local[*]").setAppName(this.getClass.getCanonicalName().init);
    val sc = new SparkContext(conf)
    sc.setLogLevel("WARN")

    val N = 3
    val lines = sc.textFile("data/advert.log")

/** 查询每个省的广告TOP3**/
//    val rdd1: RDD[(String, String)] = lines.map {
//      line =>
//        val strings: Array[String] = line.split("\\s+")
//        (strings(1), strings(4))
//    }
//    //按省份广告进行汇总求和
//    val rdd2: RDD[((String, String), Int)] = rdd1.map {
//      case (provice, adid) => ((provice, adid), 1)
//    }.reduceByKey(_ + _)
//
//    val rdd3: RDD[(String, Iterable[(String, Int)])] = rdd2.map {
//      case ((provice, adid), count) => (provice, (adid, count))
//    }.groupByKey()
//
//    val rdd4 = rdd3.mapValues {
//      buffer => buffer.toList.sortWith(_._2 > _._2).take(N).map(_._1).mkString(":")
//    }
//    rdd4.collect().foreach(println)
/** 查询每个省每小时广告TOP3*/



    sc.stop
  }

  def getHour(str:String):Int={
    import org.joda.time.DateTime
    val dt = new DateTime()
    dt.getHourOfDay
  }
}
